/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.fetcher;
import java.io.*;
import java.util.Arrays;
import java.util.Date;
import net.nutch.io.*;
import net.nutch.pagedb.FetchListEntry;
import net.nutch.tools.UpdateDatabaseTool;
/*********************************************
* An entry in the fetcher's output. This includes all of the fetcher output
* except the raw and stripped versions of the content, which are placed in
* separate files.
*
* @author Doug Cutting
*********************************************/
public class FetcherOutput implements Writable {
public static final String DIR_NAME = "fetcher";
public static final String DONE_NAME = "fetcher.done";
public static final String ERROR_NAME = "fetcher.error";
private final static byte VERSION = 3;
public final static byte RETRY = 0;
public final static byte SUCCESS = 1;
public final static byte NOT_FOUND = 2;
private FetchListEntry fetchListEntry;
private MD5Hash md5Hash;
private int status;
private String title = "";
private Outlink[] outlinks;
private long fetchDate;
public FetcherOutput() {}
public FetcherOutput(FetchListEntry fetchListEntry,
MD5Hash md5Hash, int status, String title,
Outlink[] outlinks) {
this.fetchListEntry = fetchListEntry;
this.md5Hash = md5Hash;
this.status = status;
this.title = title != null ? title : "";
this.outlinks = outlinks;
this.fetchDate = System.currentTimeMillis();
}
public byte getVersion() { return VERSION; }
public void readFields(DataInput in) throws IOException {
byte version = in.readByte(); // read version
fetchListEntry = FetchListEntry.read(in);
md5Hash = MD5Hash.read(in);
status = in.readByte();
title = UTF8.readString(in);
int totalOutlinks = in.readInt();
int outlinksToRead = Math.min(UpdateDatabaseTool.MAX_OUTLINKS_PER_PAGE,
totalOutlinks);
outlinks = new Outlink[outlinksToRead];
for (int i = 0; i < outlinksToRead; i++) {
outlinks[i] = Outlink.read(in);
}
for (int i = outlinksToRead; i < totalOutlinks; i++) {
Outlink.skip(in);
}
fetchDate = (version > 1) ? in.readLong() : 0; // added in version=2
}
public void write(DataOutput out) throws IOException {
out.writeByte(VERSION); // store current version
fetchListEntry.write(out);
md5Hash.write(out);
out.writeByte(status);
UTF8.writeString(out, title);
out.writeInt(outlinks.length);
for (int i = 0; i < outlinks.length; i++) {
outlinks[i].write(out);
}
out.writeLong(fetchDate);
}
public static FetcherOutput read(DataInput in) throws IOException {
FetcherOutput fetcherOutput = new FetcherOutput();
fetcherOutput.readFields(in);
return fetcherOutput;
}
//
// Accessor methods
//
public FetchListEntry getFetchListEntry() { return fetchListEntry; }
public MD5Hash getMD5Hash() { return md5Hash; }
public int getStatus() { return status; }
public String getTitle() { return title; }
public Outlink[] getOutlinks() { return outlinks; }
public long getFetchDate() { return fetchDate; }
public void setFetchDate(long fetchDate) { this.fetchDate = fetchDate; }
public boolean equals(Object o) {
if (!(o instanceof FetcherOutput))
return false;
FetcherOutput other = (FetcherOutput)o;
return
this.fetchListEntry.equals(other.fetchListEntry) &&
this.md5Hash.equals(other.md5Hash) &&
(this.status == other.status) &&
this.title.equals(other.title) &&
Arrays.equals(this.outlinks, other.outlinks);
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("FetchListEntry: " + fetchListEntry + "Fetch Result:\n" );
buffer.append("MD5Hash: " + md5Hash + "\n" );
buffer.append("Status: " + status + "\n" );
buffer.append("Title: " + title + "\n" );
buffer.append("Outlinks: " + outlinks.length + "\n" );
for (int i = 0; i < outlinks.length; i++) {
buffer.append(" outlink: " + outlinks[i] + "\n");
}
buffer.append("FetchDate: " + new Date(fetchDate) + "\n" );
return buffer.toString();
}
public static void main(String argv[]) throws Exception {
String usage = "FetcherOutput (-recno <recno> | -dumpall) [-filename <filename>]";
if (argv.length == 0 || argv.length > 4) {
System.out.println("usage:" + usage);
return;
}
// Process the args
String filename = FetcherOutput.DIR_NAME;
boolean dumpall = false;
int recno = -1;
for (int i = 0; i < argv.length; i++) {
if ("-recno".equals(argv[i])) {
recno = Integer.parseInt(argv[i+1]);
i++;
} else if ("-dumpall".equals(argv[i])) {
dumpall = true;
} else if ("-filename".equals(argv[i])) {
filename = argv[i+1];
i++;
}
}
// Now carry out the command
ArrayFile.Reader fetcher = new ArrayFile.Reader(filename);
try {
FetcherOutput fo = new FetcherOutput();
if (dumpall) {
while ((fo = (FetcherOutput) fetcher.next(fo)) != null) {
recno++;
System.out.println("Retrieved " + recno + " from file " + filename);
System.out.println(fo);
}
} else if (recno >= 0) {
fetcher.get(recno, fo);
System.out.println("Retrieved " + recno + " from file " + filename);
System.out.println(fo);
}
} finally {
fetcher.close();
}
}
}